url: jtimm.net | git: jaytimm | twitter: DrJayTimm
Updated: 2020-11-02
An R-based guide to accessing, exploring & visualizing US political data via a collection of publicly available resources, including election returns for presidential and congressional races, political ideology scores for US lawmakers, and census-based characterizations of US congressional districts.
Election returns used in this guide have been collated from Daily Kos, MIT Election Data and Science Lab and Wikipedia; the R packages Rvoteview & tidycensus are used extensively to characterize lawmakers/voting behavior and district demographics, respectively.
Hopefully a useful open source & transparent framework for investigating past & future election results and congresses using R. All work presented here can be reproduced in its entirety. A stand-alone html version of this guide can be downloaded here.
library(tidyverse)
library(sf)
library(tigris)
options(tigris_use_cache = TRUE, tigris_class = "sf")
nonx <- c('78', '69', '66', '72', '60', '15', '02')
states_sf <- tigris::states(cb = TRUE) %>%
rename(state_code = STATEFP, state_abbrev = STUSPS)
states <- states_sf %>%
data.frame() %>%
select(state_code, state_abbrev)
uscds <- tigris::congressional_districts(cb = TRUE) %>%
select(GEOID) %>%
mutate(state_code = substr(GEOID, 1, 2),
district_code = substr(GEOID, 3, 4))
laea <- sf::st_crs("+proj=laea +lat_0=30 +lon_0=-95")
theme_guide <- function () {
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.title.y=element_blank(),
axis.text.y=element_blank(),
legend.title=element_blank(),
legend.position = 'none',
complete = F) }
Per VoteView definition: The South = Dixie + Kentucky + Oklahoma
south <- c('SC', 'MS', 'FL',
'AL', 'GA', 'LA', 'TX',
'VA', 'AR', 'NC', 'TN',
'OK', 'KY')
swing <- c('AZ', 'FL', 'GA', 'MI',
'MN', 'NC', 'PA', 'WI')
states_sf %>%
filter(!state_code %in% nonx) %>%
mutate(south = ifelse(state_abbrev %in% south,
'south', 'not'),
swing = ifelse(state_abbrev %in% swing,
'swing', 'not')) %>%
select(state_abbrev, geometry, south, swing) %>%
gather(-state_abbrev, -geometry, key = 'type', value = 'val') %>%
mutate(label = ifelse(!grepl('not', val), state_abbrev, NA)) %>%
sf::st_transform(laea) %>%
ggplot() +
geom_sf(aes(fill = val),
color = 'white', size = .15,
alpha = 0.65) +
ggsflabel::geom_sf_text(aes(label = label),
size = 2.25,
color='black') +
scale_fill_manual(values = c('#8faabe',
'#1a476f',
'#55752f')) +
theme_minimal() +
theme_guide() +
theme(panel.background =
element_rect(fill = '#d5e4eb', color = NA)) +
facet_wrap(~type, ncol = 1)
The VoteView project provides roll call-based political ideology scores for all lawmakers in the history of the US Congress. The R package
Rvoteviewprovides access to these data.
## NOTE: election years. term begins year + 1
ccr <- data.frame(year = c(1786 + 2*rep(c(1:116))),
congress = c(1:116))
con <- 63 #66
vvo <- lapply(c('house', 'senate'), function(x) {
Rvoteview::download_metadata(type = 'members',
chamber = x) %>%
filter(congress > con & chamber != 'President') }) #66
## [1] "/tmp/RtmpeFXln3/Hall_members.csv"
## [1] "/tmp/RtmpeFXln3/Sall_members.csv"
congress <- vvo %>%
bind_rows() %>%
mutate(x = length(unique(district_code))) %>%
ungroup() %>%
mutate(district_code = ifelse(x==1, 0, district_code)) %>%
mutate(district_code =
stringr::str_pad (as.numeric(district_code),
2, pad = 0),
southerner = ifelse(state_abbrev %in% south,
'South', 'Non-south'),
party_name = case_when (party_code == 100 ~ 'Democrat',
party_code == 200 ~ 'Republican',
!party_code %in% c(100, 200) ~ 'other')) %>%
left_join(ccr, by = 'congress')
The
uspolspackage is my attempt at taming publicly available US election data. The package collates data from Daily Kos, MEDSL & Wikipedia in a uniform format. Importantly, package documentation details all data transformation processes from raw data to package table. So, if you take issue with a data point, check out the documentation and let me know. Why election return data are so nebulous from an accessibility standpoint is absolutely beyond me.
library(devtools)
devtools::install_github("jaytimm/uspols")
library(uspols)
Historical Presidential election results by state via Wikipedia. Equal-area state geometry via Daily Kos.
mp <- uspols::xsf_TileOutv10 %>%
left_join(uspols::uspols_wiki_pres %>%
filter(year > 1971) %>%
mutate(margins = republican - democrat))
mp %>%
ggplot() +
geom_sf(aes(fill = margins),
color = 'darkgray', lwd = .15) +
geom_sf(data = uspols::xsf_TileInv10,
fill = NA,
show.legend = F,
color = NA,
lwd=.5) +
ggsflabel::geom_sf_text(data = uspols::xsf_TileInv10,
aes(label = state_abbrev),
size = 1.5,
color='black') +
scale_fill_distiller(palette = "RdBu",
limit = max(abs(mp$margins)) * c(-1, 1)) +
facet_wrap(~year, ncol = 4) +
theme_minimal()+ theme_guide() +
labs(title = "Voting margins in Presidential elections since 1972")
clean_prex <- uspols::uspols_wiki_pres %>%
mutate(winner = gsub('Franklin D. Roosevelt', 'FDR', winner),
winner = gsub('Lyndon B. Johnson', 'LBJ', winner),
winner = gsub('Hillary Clinton', 'HRC', winner))
last_dem <- clean_prex %>%
group_by(state_abbrev, party_win) %>%
filter(year == max(year),
party_win == 'democrat') %>%
ungroup() %>%
mutate(lab = paste0(year, ' - ', winner))
Nine US states have not voted for a Democratic Presidential candidate since LBJ.
new1 <- uspols::xsf_TileInv10 %>%
left_join(last_dem, by ='state_abbrev') %>%
mutate(label = paste0(state_abbrev,
'\n',
year,
'\n',
gsub('^.* ', '', winner)))
uspols::xsf_TileOutv10 %>%
left_join(last_dem, by ='state_abbrev') %>%
ggplot() +
geom_sf(aes(fill = winner),
color = 'white' ,
alpha = .65) +
ggsflabel::geom_sf_text(data = new1,
aes(label = new1$label),
size = 3,
color = 'black') +
theme_minimal() +
theme_guide() +
theme(legend.position = 'none') +
ggthemes::scale_fill_economist()+
labs(title = "When each state last voted for a Democratic presidential nominee")
A Senate delegation for a given state is said to be split when comprised of Senators from different parties, eg, one Republican and one Democrat – as is the case with, eg, West Virginia in the (present) 116th Congress.
sens <- congress %>%
filter(chamber == 'Senate') %>%
mutate(party_name = as.factor(party_name)) %>%
mutate(party_name = forcats::fct_relevel(party_name,
'other',
after = 2)) %>%
mutate(year = year + 1) %>%
group_by(year, congress, state_abbrev) %>%
mutate(layer = row_number())%>%
slice(1:2) %>%
ungroup() %>%
arrange (year, state_abbrev, party_name) %>%
select(year, congress, state_abbrev, party_name, layer)
sens2 <- sens %>%
filter(congress %in% c(68, 74, 80,
86, 92, 98,
104, 110, 116))
uspols::xsf_TileOutv10 %>%
left_join(sens2 %>% filter(layer == 2)) %>%
ggplot() +
geom_sf(aes(fill = party_name),
color = 'white',
lwd = 0.2,
alpha = .85) +
geom_sf(data = uspols::xsf_TileInv10 %>%
left_join(sens2 %>% filter (layer == 1)),
aes(fill = party_name),
color = 'white',
lwd = 0.2,
alpha = .7) +
ggsflabel::geom_sf_text(data = uspols::xsf_TileInv10,
aes(label = state_abbrev),
size = 1.55,
color = 'white') +
ggthemes::scale_fill_stata()+
theme_minimal() +
theme_guide() +
theme(legend.position = 'bottom') +
facet_wrap(~year + congress) +
labs(title = "Senate composition by state since 1923",
caption = 'Data sources: Daily Kos & VoteView')
split_senate <- sens %>%
group_by(year, congress, state_abbrev) %>%
summarize(splits = length(unique(party_name)),
parts = paste0(party_name, collapse = '-')) %>%
mutate(parts = ifelse(splits == 2, 'Split', paste0('Both ', gsub('-.*$', '', parts))))
split_senate$parts <- factor(split_senate$parts,
levels = c('Both Democrat',
'Split',
'Both Republican',
'Both other'))
split_senate %>%
filter(splits == 2) %>%
group_by(year, congress) %>%
summarize(n = n()) %>%
ggplot() +
geom_bar(aes(x = year,
y = n),
color = 'white',
fill = 'steelblue',
stat = 'identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x=element_blank()) +
scale_x_continuous(breaks = seq(1915, 2019, 4)) +
labs(title = "Split Senate delegations since 1915")
split_pal <- c('#395f81', '#ead8c3', '#9e5055', '#b0bcc1')
split_senate %>%
group_by(year, congress, parts) %>%
summarize(n = n()) %>%
ggplot(aes(x = year,
y = n,
fill = parts))+
geom_bar(alpha = 0.85,
color = 'gray',
lwd = .25,
stat = 'identity') +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))+
theme(legend.position = "bottom",
legend.title=element_blank())+
scale_fill_manual(values = split_pal) +
scale_x_continuous(breaks = seq(1915, 2019, 4)) +
xlab('') +
ggtitle('US Senate delegations, by party affiliations')
Split-ticket voting is when a voter casts votes for candidates of different parties across multiple offices in a given election. Split-ticket voting contrasts with straight-ticket voting. Here, we consider one instance of split-ticket voting: President-Senator splits at the state-level.
splits <- uspols::uspols_wiki_pres %>% ###
rename(party_pres = party_win) %>%
filter(year >= 1976) %>%
select(year, state_abbrev, party_pres) %>%
inner_join(uspols::uspols_medsl_senate, ###
by = c('year', 'state_abbrev')) %>%
mutate(split = ifelse(party_pres != party_win, 1, 0)) %>%
complete(year, state_abbrev)
#mutate(split = ifelse(is.na(split), 0, split))
year <- seq(from = 1976, to = 2016, by = 2)
class <- paste0('Class-', c(rep(1:3,7)))
df <- data.frame(year, class)
uspols::xsf_TileOutv10 %>%
left_join(splits, by = 'state_abbrev') %>%
left_join(df, by = 'year') %>%
mutate (split1 = case_when (split == 0 ~ 'Straight-ticket',
split == 1 ~ 'Split-ticket',
is.na(split) ~ 'No-contest')) %>%
ggplot() +
geom_sf(aes(fill = split1),
color = 'black',
lwd = .15,
alpha = 0.65) +
ggsflabel::geom_sf_text(data = uspols::xsf_TileInv10,
aes(label = state_abbrev),
size = 1.75,
color = 'black') +
scale_fill_manual(values = c('#8faabe', '#55752f',
'#dae2ba')) + #,
facet_wrap(~year + class) +
theme_minimal() +
theme_guide() +
theme(legend.position = 'bottom') +
labs(title = "Pres-Senate split-tickets per general election year")
Yearly populations by state scraped from FRED Economic Data, and made avaliable as a table in the
sometablespackage.
library(devtools)
devtools::install_github("jaytimm/sometables")
library(tables)
Senate seats held by Republicans via VoteView; population of states represented by Reublican senators via FRED.
wpops <- sens %>% #yy %>%
left_join(sometables::pop_us_states,
by = c('year', 'state_abbrev')) %>% #yy %>%
group_by(year, party_name) %>%
summarize(n = n(),
#n = sum(n),
pop = sum(pop)) %>%
group_by(year) %>%
mutate(Senate_share = round(n/sum(n) * 100, 1),
Population_share = round(pop/sum(pop) * 100, 1)) %>%
filter(party_name == 'Republican') %>%
select(year, Senate_share, Population_share) %>%
gather(-year, key = 'var', value = 'per')
Gray highlight: Congresses in which (1) GOP senators hold a majority in the Senate AND (2) a minority of Americans are represented by a Repbulican senator.
wpops %>%
ggplot() +
geom_rect(aes(xmin = 2015,
xmax = 2019,
ymin = -Inf,
ymax = Inf),
fill = 'lightgray') +
geom_hline(yintercept = 50, color = 'black', lwd = .2) +
geom_line(aes(x = year,
y = per,
color = var),
size = 1) +
ggthemes::scale_color_few()+
theme_minimal() +
theme(legend.position = 'bottom',
axis.text.x = element_text(angle = 90),
axis.title.x = element_blank(),
legend.title = element_blank()) +
scale_x_continuous(breaks = seq(min(wpops$year), max(wpops$year), 4)) +
labs(subtitle = "Republican Senate share v. Share Americans represented by Republican senator")
congress_south <- congress %>%
filter(party_code %in% c(100, 200), chamber == 'House') %>%
mutate(Member = as.factor(paste0(party_name, ', ', southerner))) %>%
## re-factor
mutate(Member = forcats::fct_relevel(Member,
'Republican, Non-south',
after = 3))
congress_south %>%
group_by(year, Member) %>%
summarize(n = n()) %>%
mutate(n = n/sum(n)) %>%
ggplot(aes(x = year+1,
y = n,
fill = Member)) +
geom_area(alpha = 0.65, color = 'gray') +
geom_hline(yintercept = 0.5, color = 'white', linetype = 2) +
scale_x_continuous(breaks=seq(1921,2018,4)) +
scale_fill_manual(values = c('#1a476f', '#8faabe',
'#e19463', '#913a40')) +
theme_minimal() +
theme(legend.position = 'top',
legend.title=element_blank(),
axis.title.y=element_blank(),
axis.title.x=element_blank(),
axis.text.y=element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
labs(title = "House composition since 1921")
DW-NOMINATE ideal points in two dimensions. The first dimension captures ideological variation based in the standard liberal-conservative divide. The second captures variation based in social conservatism that crosscuts political affiliation.
congress_south %>%
mutate(year = year + 1) %>%
filter (congress %in% c(84, 88, 92,
96, 100, 104,
108, 112, 116)) %>%
ggplot(aes(x = nominate_dim1,
y = nominate_dim2) ) +
annotate("path",
x=cos(seq(0,2*pi,length.out=300)),
y=sin(seq(0,2*pi,length.out=300)),
color='gray',
size = .25) +
geom_point(aes(color = Member),
size= 1.25,
shape = 17) +
scale_color_manual(values = c('#1a476f', '#8faabe',
'#e19463', '#913a40')) +
facet_wrap(~year + congress) +
theme_minimal() +
theme(legend.title=element_blank(),
legend.position = 'bottom') +
labs(title="The evolution of the Southern Republican",
subtitle = 'In two dimensions: from 1955 to 2019')
anim <- congress_south %>%
ggplot(aes(x = nokken_poole_dim1,
y = nokken_poole_dim2) ) +
annotate("path",
x=cos(seq(0,2*pi,length.out=300)),
y=sin(seq(0,2*pi,length.out=300)),
color='gray',
size = .25) +
geom_point(aes(color = Member),
size= 2.25,
shape = 17) +
scale_color_manual(values = c('#1a476f', '#8faabe',
'#e19463', '#913a40')) +
theme_minimal() +
theme(legend.title=element_blank(),
legend.position = 'bottom') +
labs(title = "Congress: {frame_time}") +
gganimate::transition_time(as.integer(congress)) +
gganimate::ease_aes('linear')
gganimate::animate(anim,
fps = 4,
nframes = 50,
height = 400,
width = 550)
# gganimate::anim_save("ideologies.gif")
Farmers Market Finder Demo
Pew Research generations:
congress %>%
mutate(age = year - born) %>%
filter (party_code %in% c('100', '200'), year > 1960) %>%
group_by(party_name, year) %>%
summarize(age = round(mean(age, na.rm = T), 1)) %>%
mutate(label = if_else(year == max(year) | year == min(year),
age, NULL)) %>%
ggplot() +
geom_line(aes(x = year + 1,
y = age,
color = party_name),
size = .8) +
ggrepel::geom_text_repel(aes(x = year + 1,
y = age,
label = label),
size= 3.25,
nudge_x = 1,
na.rm = TRUE) +
ggthemes::scale_color_stata()+
theme_minimal() +
theme(legend.position = 'none',
axis.text.x = element_text(angle = 90, hjust = 1),
axis.title.x=element_blank()) +
scale_x_continuous(breaks=seq(1963, 2019, 2)) +
labs(title = "Average age of congress members by party")
congress %>%
mutate(age = year - born,
year = year + 1) %>%
filter (party_code %in% c('100', '200'), year > 2007) %>%
## 100 == democrat --
ggplot(aes(age,
fill = party_name)) +
ggthemes::scale_fill_stata()+
theme_minimal() +
theme(legend.position = "none",
axis.title.y=element_blank(),
axis.text.y=element_blank()) +
geom_density(alpha = 0.6, color = 'gray') +
facet_wrap(~year, nrow = 2)+
labs(title="Age distributions in the House since 2009, by party")
freshmen1 <- congress %>%
group_by(icpsr, bioname, party_name) %>%
summarize(min = min(year),
max = max(year)) %>%
group_by(min, party_name) %>%
summarise(count = n()) %>%
ungroup() %>%
filter(min > 1960, party_name != 'other')
labs <- freshmen1 %>%
arrange(desc(min)) %>%
top_n(4, count) %>%
mutate(txt = c('Obama 1st midterm',
'Clinton 1st midterm',
'"Watergate babies"',
'LBJ atop ticket'))
freshmen1 %>%
ggplot() +
geom_line(aes(x = min + 1,
y = count,
color = party_name),
size = 0.8) +
geom_text(data = labs,
aes(x = min,
y = count,
label = txt),
size = 3, nudge_y = 3) +
ggthemes::scale_color_stata()+
theme_minimal() +
theme(legend.position = 'none',
axis.title.x=element_blank(),
axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_x_continuous(breaks=seq(1963,2019,2)) +
labs(title = "Freshman House members by party")
freshmen <- congress %>%
group_by(icpsr, bioname) %>%
mutate(n = length(congress)) %>%
ungroup() %>%
filter(congress == 116) %>% # only correct here -- not older congresses
mutate (Class = case_when (n == 1 ~ 'Freshmen',
n == 2 ~ 'Sophmores',
n > 2 ~ 'Upper-class')) %>%
select(icpsr, party_code, Class)
gens <- sometables::pew_generations %>%
mutate(age = 2020 - end_yr) %>%
filter(order %in% c(2:5))
congress %>%
filter (party_code %in% c('100', '200'),
congress == 116) %>%
mutate(age = year - born,
party_code = ifelse(party_code == '100',
'House Democrats',
'House Republicans')) %>%
left_join(freshmen %>% select(-party_code), by = "icpsr") %>%
## 100 == democrat --
ggplot() +
geom_dotplot(aes(x = age,
color = Class,
fill = Class),
method="histodot",
dotsize = .9,
binpositions = 'all',
stackratio = 1.3,
stackgroups=TRUE,
binwidth = 1) +
geom_vline(xintercept =gens$age - 0.5,
linetype =2,
color = 'black',
size = .25) +
geom_text(data = gens,
aes(x = age + 2.25,
y = 0.95,
label = generation),
size = 3) +
theme_minimal() +
ggthemes::scale_fill_economist() +
ggthemes::scale_color_economist() +
facet_wrap(~party_code, nrow = 2) +
theme(legend.position = "bottom",
axis.title.y=element_blank(),
axis.text.y=element_blank()) +
#ylim (0, .5) +
labs(title = "Age distribution of the 116th House by party")
The US Census/American Community Survey (ACS) make counts/estimates available by congressional district. The R package
tidycensusprovides very clean access to census APIs. Via super convenient ACS Data Profiles.
variable_list <- c(bachelors_higher = 'DP02_0068P',
foreign_born = 'DP02_0093P',
hispanic = 'DP05_0071P',
median_income = 'DP03_0062',
unemployed = 'DP03_0005P',
white = 'DP05_0077P',
black = 'DP05_0078P',
over_65 = 'DP05_0024P',
hs_higher = 'DP02_0067P',
non_english_home = 'DP02_0113P',
computer_home = 'DP02_0152P',
internet_home = 'DP02_0153P')
gen <- tidycensus::get_acs(geography = 'congressional district',
variables = variable_list,
year = 2019,
survey = 'acs1',
geometry = F) %>%
mutate(state_code = substr(GEOID, 1, 2),
district_code = substr(GEOID, 3, 4)) %>%
left_join(states, by = c('state_code')) %>%
select(state_abbrev, district_code, variable, estimate, moe)
Density plots for 12 ACS variables per 435 US congressional districts. Details for New Mexico’s 2nd district summarized in plot below as dashed lines.
base_viz <- gen %>%
ggplot( aes(estimate, fill = variable)) +
geom_density(alpha = 0.65,
color = 'darkgray',
adjust = 2.5) +
scale_fill_manual(
values = colorRampPalette(ggthemes::economist_pal()(6))(12)) +
facet_wrap(~variable, scale = 'free', ncol = 4)+
theme_minimal() +
theme(legend.position = "none",
axis.text.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank())
nm02 <- gen %>%
filter(state_abbrev == 'NM' & district_code == '02')
base_viz +
geom_vline (data = nm02,
aes(xintercept=estimate),
linetype = 2) +
labs(title = "A 2019 demographic profile",
subtitle = "New Mexico's 2nd District")
A multi-panel summary of relationships between ACS variables and 2016 Trump margins.
gen %>%
left_join(uspols::uspols_dk_pres,
by = c("state_abbrev", "district_code")) %>%
filter(year == 2016) %>%
mutate(Trump_margins = republican - democrat) %>%
ggplot(aes(y = Trump_margins,
x = estimate,
color = variable))+
geom_point(size =1) + #
geom_smooth(method="loess", se=T, color = 'black', linetype = 3) +
scale_color_manual( values = colorRampPalette(ggthemes::economist_pal()(6))(12)) +
theme_minimal() +
theme(legend.position = "none")+
facet_wrap(~variable, scales = 'free_x') +
labs(title = "2019 ACS estimates vs. 2016 Trump margins")
White working class formalized in US Census terms: Population 25 years & older who (1) identify as both White & non-Hispanic and (2) have not obtained a Bachelor’s degree (or higher). Per Table C15002: Sex by educational attainment for the population 25 years and over.
Note: In previous version of this document, I did not correctly count the White working class.
white_ed_vars <- c(white_m_bach = 'C15002H_006',
white_w_bach = 'C15002H_011',
white_pop = 'C15002H_001',
all_w_xbach = 'C15002_016',
all_w_xgrad = 'C15002_017',
all_m_xbach = 'C15002_008',
all_m_xgrad = 'C15002_009',
all_pop = 'C15002_001')
Categories include:
Note: The “and/or Hispanic” piece is slightly confusing here. For most people, Hispanic = Brown = Race; from this perspective, ethnicity (as distinct from race) is not a meaningful distinction. We include it here because it is included in the census.
white_ed <- tidycensus::get_acs(geography = 'congressional district',
variables = white_ed_vars,
year = 2019,
survey = 'acs1') %>%
select(-moe) %>%
spread(key = variable, value = estimate) %>%
mutate(white_college =
white_m_bach +
white_w_bach,
white_working =
white_pop -
white_college,
non_white_college =
all_m_xbach +
all_w_xbach +
all_m_xgrad +
all_w_xgrad -
white_college,
non_white_working =
all_pop -
white_pop -
non_white_college) %>%
select(GEOID, white_college:non_white_working) %>%
gather(-GEOID, key = 'group', value = 'estimate') %>%
group_by(GEOID) %>%
mutate(per = round(estimate/ sum(estimate) * 100, 1)) %>%
ungroup() %>%
mutate(state_code = substr(GEOID, 1, 2),
district_code = substr(GEOID, 3, 4)) %>%
left_join(states, by = c('state_code')) %>%
select(GEOID, state_code, state_abbrev,
district_code, group, per, estimate)
set.seed(99)
samp_n <- sample(unique(white_ed $GEOID), 12)
white_ed %>%
filter(GEOID %in% samp_n) %>%
ggplot(aes(area = per,
fill = group,
label = gsub('_', '-', toupper(group)),
subgroup = group))+
treemapify::geom_treemap(alpha=.65)+
treemapify::geom_treemap_subgroup_border(color = 'white') +
treemapify::geom_treemap_text(colour = "black",
place = "topleft",
reflow = T,
size = 8) +
scale_fill_manual(values = c('#8faabe', '#1a476f',
'#dae2ba', '#55752f')) +
theme_minimal() +
facet_wrap(~paste0(state_abbrev, '-', district_code)) +
theme(legend.position = "none") +
labs(title = "Race-work class distributions",
caption = 'Source: ACS 1-Year estimates, 2019, Table C15002')
A state-based map in pie charts. Race-work class counts for states based on aggregate of congressional district counts.
white_ed2_state <- white_ed %>%
group_by(state_abbrev, group) %>%
summarise(estimate = sum(estimate)) %>%
mutate(per = round(estimate/sum(estimate) * 100, 1)) %>%
ungroup()
Calculate centroids for the equal-area state geometry.
cents <- sf::st_centroid(uspols::xsf_TileOutv10) %>%
sf::st_coordinates() %>%
data.frame() %>%
bind_cols(state_abbrev = uspols::xsf_TileOutv10$state_abbrev,
FIPS = uspols::xsf_TileOutv10$FIPS) %>%
left_join(white_ed2_state %>%
select(-estimate) %>%
spread(group, per))
ggplot() +
geom_sf(data = uspols::xsf_TileOutv10,
color = 'gray') +
scatterpie::geom_scatterpie(data = cents,
aes(x = X,
y = Y,
group = state_abbrev,
r = .17),
cols = colnames(cents)[5:8],
color = 'white',
alpha = 0.75) +
ggsflabel::geom_sf_text(data = uspols::xsf_TileInv10,
aes(label = state_abbrev),
size = 3,
color='black') +
scale_fill_manual(values = c('#8faabe', '#1a476f',
'#dae2ba', '#55752f')) +
theme_minimal()+
theme(axis.text.x=element_blank(),
axis.text.y=element_blank(),
axis.title.x=element_blank(),
axis.title.y=element_blank(),
legend.position = 'bottom',
legend.title=element_blank()) +
labs(title = "Race-work class distributions in America",
caption = 'Source: ACS 1-Year estimates, 2019, Table C15002')
Degree of “rurality” operationalized as the size/geographic-area of a given congressional district (in log sq meters).
uscds <- uscds %>%
mutate(CD_AREA = round(log(as.numeric(
gsub(' m^2]', '', sf::st_area(.)))), 2))
bp <- white_ed %>%
select(GEOID, state_abbrev, district_code, group, per) %>%
filter(group == 'white_working') %>%
left_join(uscds %>% select(-district_code), by = 'GEOID') %>%
left_join(uspols::uspols_dk_pres %>% filter(year == 2016),
by = c("state_abbrev", "district_code")) %>%
filter(CD_AREA < 27) %>%
mutate(GEO = ifelse(state_abbrev %in% south,
'Southern CD', 'Non-southern CD'),
Trump_margins = republican - democrat) %>%
filter(!is.na(Trump_margins))
bp %>%
ggplot(aes(y = per,
x = CD_AREA,
shape = GEO,
color = Trump_margins))+
geom_point(size = 2.75) +
scale_color_distiller(palette = "RdYlBu",
limit = max(abs(bp$Trump_margins)) * c(-1, 1)) +
#theme_minimal() +
xlab('Congressional district area (log sq meters)') +
ylab('% White working class') +
theme(legend.position = "bottom") +
labs(title = "Degree of rurality ~ % White working ~ 2016 Trump margins")